{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to DataFrames\n",
    "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), November 20, 2020**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "using DataFrames # load package"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reshaping DataFrames"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Wide to long"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>M1</th><th>M2</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th><th>Int64</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>1</td><td>1</td><td>11</td><td>111</td></tr><tr><th>2</th><td>2</td><td>1</td><td>12</td><td>112</td></tr><tr><th>3</th><td>3</td><td>2</td><td>13</td><td>113</td></tr><tr><th>4</th><td>4</td><td>2</td><td>14</td><td>114</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & M1 & M2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Int64 & Int64 & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & 1 & 11 & 111 \\\\\n",
       "\t2 & 2 & 1 & 12 & 112 \\\\\n",
       "\t3 & 3 & 2 & 13 & 113 \\\\\n",
       "\t4 & 4 & 2 & 14 & 114 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m4×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2   \u001b[0m\u001b[1m M1    \u001b[0m\u001b[1m M2    \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n",
       "─────┼────────────────────────────\n",
       "   1 │     1      1     11    111\n",
       "   2 │     2      1     12    112\n",
       "   3 │     3      2     13    113\n",
       "   4 │     4      2     14    114"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id=[1,2,3,4], id2=[1,1,2,2], M1=[11,12,13,14], M2=[111,112,113,114])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>8 rows × 3 columns</p><tr><th>1</th><td>1</td><td>M1</td><td>11</td></tr><tr><th>2</th><td>2</td><td>M1</td><td>12</td></tr><tr><th>3</th><td>3</td><td>M1</td><td>13</td></tr><tr><th>4</th><td>4</td><td>M1</td><td>14</td></tr><tr><th>5</th><td>1</td><td>M2</td><td>111</td></tr><tr><th>6</th><td>2</td><td>M2</td><td>112</td></tr><tr><th>7</th><td>3</td><td>M2</td><td>113</td></tr><tr><th>8</th><td>4</td><td>M2</td><td>114</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccc}\n",
       "\t& id & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & String & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & M1 & 11 \\\\\n",
       "\t2 & 2 & M1 & 12 \\\\\n",
       "\t3 & 3 & M1 & 13 \\\\\n",
       "\t4 & 4 & M1 & 14 \\\\\n",
       "\t5 & 1 & M2 & 111 \\\\\n",
       "\t6 & 2 & M2 & 112 \\\\\n",
       "\t7 & 3 & M2 & 113 \\\\\n",
       "\t8 & 4 & M2 & 114 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m8×3 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Int64 \u001b[0m\n",
       "─────┼────────────────────────\n",
       "   1 │     1  M1           11\n",
       "   2 │     2  M1           12\n",
       "   3 │     3  M1           13\n",
       "   4 │     4  M1           14\n",
       "   5 │     1  M2          111\n",
       "   6 │     2  M2          112\n",
       "   7 │     3  M2          113\n",
       "   8 │     4  M2          114"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stack(x, [:M1, :M2], :id) # first pass measure variables and then id-variable"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "add `view=true` keyword argument to make a view; in that case columns of the resulting data frame share memory with columns of the source data frame, so the operation is potentially unsafe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>key</th><th>observed</th></tr><tr><th></th><th>Int64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>8 rows × 3 columns</p><tr><th>1</th><td>1</td><td>M1</td><td>11</td></tr><tr><th>2</th><td>2</td><td>M1</td><td>12</td></tr><tr><th>3</th><td>3</td><td>M1</td><td>13</td></tr><tr><th>4</th><td>4</td><td>M1</td><td>14</td></tr><tr><th>5</th><td>1</td><td>M2</td><td>111</td></tr><tr><th>6</th><td>2</td><td>M2</td><td>112</td></tr><tr><th>7</th><td>3</td><td>M2</td><td>113</td></tr><tr><th>8</th><td>4</td><td>M2</td><td>114</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccc}\n",
       "\t& id & key & observed\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & String & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & M1 & 11 \\\\\n",
       "\t2 & 2 & M1 & 12 \\\\\n",
       "\t3 & 3 & M1 & 13 \\\\\n",
       "\t4 & 4 & M1 & 14 \\\\\n",
       "\t5 & 1 & M2 & 111 \\\\\n",
       "\t6 & 2 & M2 & 112 \\\\\n",
       "\t7 & 3 & M2 & 113 \\\\\n",
       "\t8 & 4 & M2 & 114 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m8×3 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m key    \u001b[0m\u001b[1m observed \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64    \u001b[0m\n",
       "─────┼─────────────────────────\n",
       "   1 │     1  M1            11\n",
       "   2 │     2  M1            12\n",
       "   3 │     3  M1            13\n",
       "   4 │     4  M1            14\n",
       "   5 │     1  M2           111\n",
       "   6 │     2  M2           112\n",
       "   7 │     3  M2           113\n",
       "   8 │     4  M2           114"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# optionally you can rename columns\n",
    "stack(x, [\"M1\", \"M2\"], \"id\", variable_name=\"key\", value_name=\"observed\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "if second argument is omitted in `stack` , all other columns are assumed to be the id-variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>8 rows × 4 columns</p><tr><th>1</th><td>1</td><td>1</td><td>M1</td><td>11</td></tr><tr><th>2</th><td>2</td><td>1</td><td>M1</td><td>12</td></tr><tr><th>3</th><td>3</td><td>2</td><td>M1</td><td>13</td></tr><tr><th>4</th><td>4</td><td>2</td><td>M1</td><td>14</td></tr><tr><th>5</th><td>1</td><td>1</td><td>M2</td><td>111</td></tr><tr><th>6</th><td>2</td><td>1</td><td>M2</td><td>112</td></tr><tr><th>7</th><td>3</td><td>2</td><td>M2</td><td>113</td></tr><tr><th>8</th><td>4</td><td>2</td><td>M2</td><td>114</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Int64 & String & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & 1 & M1 & 11 \\\\\n",
       "\t2 & 2 & 1 & M1 & 12 \\\\\n",
       "\t3 & 3 & 2 & M1 & 13 \\\\\n",
       "\t4 & 4 & 2 & M1 & 14 \\\\\n",
       "\t5 & 1 & 1 & M2 & 111 \\\\\n",
       "\t6 & 2 & 1 & M2 & 112 \\\\\n",
       "\t7 & 3 & 2 & M2 & 113 \\\\\n",
       "\t8 & 4 & 2 & M2 & 114 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m8×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2   \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Int64 \u001b[0m\n",
       "─────┼───────────────────────────────\n",
       "   1 │     1      1  M1           11\n",
       "   2 │     2      1  M1           12\n",
       "   3 │     3      2  M1           13\n",
       "   4 │     4      2  M1           14\n",
       "   5 │     1      1  M2          111\n",
       "   6 │     2      1  M2          112\n",
       "   7 │     3      2  M2          113\n",
       "   8 │     4      2  M2          114"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stack(x, Not([:id, :id2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>String</th><th>Int64</th></tr></thead><tbody><p>8 rows × 4 columns</p><tr><th>1</th><td>1</td><td>1</td><td>M1</td><td>11</td></tr><tr><th>2</th><td>2</td><td>1</td><td>M1</td><td>12</td></tr><tr><th>3</th><td>3</td><td>2</td><td>M1</td><td>13</td></tr><tr><th>4</th><td>4</td><td>2</td><td>M1</td><td>14</td></tr><tr><th>5</th><td>1</td><td>1</td><td>M2</td><td>111</td></tr><tr><th>6</th><td>2</td><td>1</td><td>M2</td><td>112</td></tr><tr><th>7</th><td>3</td><td>2</td><td>M2</td><td>113</td></tr><tr><th>8</th><td>4</td><td>2</td><td>M2</td><td>114</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Int64 & String & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & 1 & M1 & 11 \\\\\n",
       "\t2 & 2 & 1 & M1 & 12 \\\\\n",
       "\t3 & 3 & 2 & M1 & 13 \\\\\n",
       "\t4 & 4 & 2 & M1 & 14 \\\\\n",
       "\t5 & 1 & 1 & M2 & 111 \\\\\n",
       "\t6 & 2 & 1 & M2 & 112 \\\\\n",
       "\t7 & 3 & 2 & M2 & 113 \\\\\n",
       "\t8 & 4 & 2 & M2 & 114 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m8×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2   \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Int64 \u001b[0m\n",
       "─────┼───────────────────────────────\n",
       "   1 │     1      1  M1           11\n",
       "   2 │     2      1  M1           12\n",
       "   3 │     3      2  M1           13\n",
       "   4 │     4      2  M1           14\n",
       "   5 │     1      1  M2          111\n",
       "   6 │     2      1  M2          112\n",
       "   7 │     3      2  M2          113\n",
       "   8 │     4      2  M2          114"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stack(x, Not([1, 2])) # you can use index instead of symbol"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>0.78947</td><td>0.319372</td></tr><tr><th>2</th><td>1</td><td>b</td><td>0.115416</td><td>0.213197</td></tr><tr><th>3</th><td>1</td><td>c</td><td>0.404406</td><td>0.701783</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & a1 & a2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & Float64 & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & 0.78947 & 0.319372 \\\\\n",
       "\t2 & 1 & b & 0.115416 & 0.213197 \\\\\n",
       "\t3 & 1 & c & 0.404406 & 0.701783 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m a1       \u001b[0m\u001b[1m a2       \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Float64  \u001b[0m\u001b[90m Float64  \u001b[0m\n",
       "─────┼─────────────────────────────────\n",
       "   1 │     1  a     0.78947   0.319372\n",
       "   2 │     1  b     0.115416  0.213197\n",
       "   3 │     1  c     0.404406  0.701783"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " if `stack` is not passed any measure variables by default numeric variables are selected as measures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>String</th><th>Float64</th></tr></thead><tbody><p>6 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>a1</td><td>0.78947</td></tr><tr><th>2</th><td>1</td><td>b</td><td>a1</td><td>0.115416</td></tr><tr><th>3</th><td>1</td><td>c</td><td>a1</td><td>0.404406</td></tr><tr><th>4</th><td>1</td><td>a</td><td>a2</td><td>0.319372</td></tr><tr><th>5</th><td>1</td><td>b</td><td>a2</td><td>0.213197</td></tr><tr><th>6</th><td>1</td><td>c</td><td>a2</td><td>0.701783</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & String & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & a1 & 0.78947 \\\\\n",
       "\t2 & 1 & b & a1 & 0.115416 \\\\\n",
       "\t3 & 1 & c & a1 & 0.404406 \\\\\n",
       "\t4 & 1 & a & a2 & 0.319372 \\\\\n",
       "\t5 & 1 & b & a2 & 0.213197 \\\\\n",
       "\t6 & 1 & c & a2 & 0.701783 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m6×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value    \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Float64  \u001b[0m\n",
       "─────┼─────────────────────────────────\n",
       "   1 │     1  a     a1        0.78947\n",
       "   2 │     1  b     a1        0.115416\n",
       "   3 │     1  c     a1        0.404406\n",
       "   4 │     1  a     a2        0.319372\n",
       "   5 │     1  b     a2        0.213197\n",
       "   6 │     1  c     a2        0.701783"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stack(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "here all columns are treated as measures:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th></tr><tr><th></th><th>String</th><th>Float64</th></tr></thead><tbody><p>6 rows × 2 columns</p><tr><th>1</th><td>x1</td><td>0.578647</td></tr><tr><th>2</th><td>x1</td><td>0.868801</td></tr><tr><th>3</th><td>x1</td><td>0.320898</td></tr><tr><th>4</th><td>x2</td><td>0.842124</td></tr><tr><th>5</th><td>x2</td><td>0.690447</td></tr><tr><th>6</th><td>x2</td><td>0.0933694</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cc}\n",
       "\t& variable & value\\\\\n",
       "\t\\hline\n",
       "\t& String & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & x1 & 0.578647 \\\\\n",
       "\t2 & x1 & 0.868801 \\\\\n",
       "\t3 & x1 & 0.320898 \\\\\n",
       "\t4 & x2 & 0.842124 \\\\\n",
       "\t5 & x2 & 0.690447 \\\\\n",
       "\t6 & x2 & 0.0933694 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m6×2 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m variable \u001b[0m\u001b[1m value     \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m String   \u001b[0m\u001b[90m Float64   \u001b[0m\n",
       "─────┼─────────────────────\n",
       "   1 │ x1        0.578647\n",
       "   2 │ x1        0.868801\n",
       "   3 │ x1        0.320898\n",
       "   4 │ x2        0.842124\n",
       "   5 │ x2        0.690447\n",
       "   6 │ x2        0.0933694"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stack(DataFrame(rand(3,2), :auto))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>key</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>6 rows × 3 columns</p><tr><th>1</th><td>1</td><td>x1</td><td>0.873212</td></tr><tr><th>2</th><td>1</td><td>x1</td><td>0.606487</td></tr><tr><th>3</th><td>1</td><td>x1</td><td>0.941399</td></tr><tr><th>4</th><td>1</td><td>x2</td><td>0.238134</td></tr><tr><th>5</th><td>1</td><td>x2</td><td>0.475324</td></tr><tr><th>6</th><td>1</td><td>x2</td><td>0.486745</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccc}\n",
       "\t& key & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & String & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & x1 & 0.873212 \\\\\n",
       "\t2 & 1 & x1 & 0.606487 \\\\\n",
       "\t3 & 1 & x1 & 0.941399 \\\\\n",
       "\t4 & 1 & x2 & 0.238134 \\\\\n",
       "\t5 & 1 & x2 & 0.475324 \\\\\n",
       "\t6 & 1 & x2 & 0.486745 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m6×3 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m key   \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value    \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Float64  \u001b[0m\n",
       "─────┼───────────────────────────\n",
       "   1 │     1  x1        0.873212\n",
       "   2 │     1  x1        0.606487\n",
       "   3 │     1  x1        0.941399\n",
       "   4 │     1  x2        0.238134\n",
       "   5 │     1  x2        0.475324\n",
       "   6 │     1  x2        0.486745"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = DataFrame(rand(3,2), :auto)\n",
    "df.key = [1,1,1]\n",
    "mdf = stack(df) # duplicates in key are silently accepted"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Long to wide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>0.736476</td><td>0.804705</td></tr><tr><th>2</th><td>1</td><td>b</td><td>0.501599</td><td>0.0438823</td></tr><tr><th>3</th><td>1</td><td>c</td><td>0.493851</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & a1 & a2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & Float64 & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & 0.736476 & 0.804705 \\\\\n",
       "\t2 & 1 & b & 0.501599 & 0.0438823 \\\\\n",
       "\t3 & 1 & c & 0.493851 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m a1       \u001b[0m\u001b[1m a2        \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Float64  \u001b[0m\u001b[90m Float64   \u001b[0m\n",
       "─────┼──────────────────────────────────\n",
       "   1 │     1  a     0.736476  0.804705\n",
       "   2 │     1  b     0.501599  0.0438823\n",
       "   3 │     1  c     0.493851  0.248967"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>variable</th><th>value</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>String</th><th>Float64</th></tr></thead><tbody><p>6 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>a1</td><td>0.736476</td></tr><tr><th>2</th><td>1</td><td>b</td><td>a1</td><td>0.501599</td></tr><tr><th>3</th><td>1</td><td>c</td><td>a1</td><td>0.493851</td></tr><tr><th>4</th><td>1</td><td>a</td><td>a2</td><td>0.804705</td></tr><tr><th>5</th><td>1</td><td>b</td><td>a2</td><td>0.0438823</td></tr><tr><th>6</th><td>1</td><td>c</td><td>a2</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & variable & value\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & String & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & a1 & 0.736476 \\\\\n",
       "\t2 & 1 & b & a1 & 0.501599 \\\\\n",
       "\t3 & 1 & c & a1 & 0.493851 \\\\\n",
       "\t4 & 1 & a & a2 & 0.804705 \\\\\n",
       "\t5 & 1 & b & a2 & 0.0438823 \\\\\n",
       "\t6 & 1 & c & a2 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m6×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m variable \u001b[0m\u001b[1m value     \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m String   \u001b[0m\u001b[90m Float64   \u001b[0m\n",
       "─────┼──────────────────────────────────\n",
       "   1 │     1  a     a1        0.736476\n",
       "   2 │     1  b     a1        0.501599\n",
       "   3 │     1  c     a1        0.493851\n",
       "   4 │     1  a     a2        0.804705\n",
       "   5 │     1  b     a2        0.0438823\n",
       "   6 │     1  c     a2        0.248967"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = stack(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id2</th><th>a1</th><th>a2</th></tr><tr><th></th><th>Char</th><th>Float64?</th><th>Float64?</th></tr></thead><tbody><p>3 rows × 3 columns</p><tr><th>1</th><td>a</td><td>0.736476</td><td>0.804705</td></tr><tr><th>2</th><td>b</td><td>0.501599</td><td>0.0438823</td></tr><tr><th>3</th><td>c</td><td>0.493851</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccc}\n",
       "\t& id2 & a1 & a2\\\\\n",
       "\t\\hline\n",
       "\t& Char & Float64? & Float64?\\\\\n",
       "\t\\hline\n",
       "\t1 & a & 0.736476 & 0.804705 \\\\\n",
       "\t2 & b & 0.501599 & 0.0438823 \\\\\n",
       "\t3 & c & 0.493851 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×3 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id2  \u001b[0m\u001b[1m a1       \u001b[0m\u001b[1m a2        \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Char \u001b[0m\u001b[90m Float64? \u001b[0m\u001b[90m Float64?  \u001b[0m\n",
       "─────┼───────────────────────────\n",
       "   1 │ a     0.736476  0.804705\n",
       "   2 │ b     0.501599  0.0438823\n",
       "   3 │ c     0.493851  0.248967"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unstack(y, :id2, :variable, :value) # stndard unstack with a specified key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>Float64?</th><th>Float64?</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>0.736476</td><td>0.804705</td></tr><tr><th>2</th><td>1</td><td>b</td><td>0.501599</td><td>0.0438823</td></tr><tr><th>3</th><td>1</td><td>c</td><td>0.493851</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & a1 & a2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & Float64? & Float64?\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & 0.736476 & 0.804705 \\\\\n",
       "\t2 & 1 & b & 0.501599 & 0.0438823 \\\\\n",
       "\t3 & 1 & c & 0.493851 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m a1       \u001b[0m\u001b[1m a2        \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Float64? \u001b[0m\u001b[90m Float64?  \u001b[0m\n",
       "─────┼──────────────────────────────────\n",
       "   1 │     1  a     0.736476  0.804705\n",
       "   2 │     1  b     0.501599  0.0438823\n",
       "   3 │     1  c     0.493851  0.248967"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unstack(y, :variable, :value) # all other columns are treated as keys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>Float64?</th><th>Float64?</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>0.736476</td><td>0.804705</td></tr><tr><th>2</th><td>1</td><td>b</td><td>0.501599</td><td>0.0438823</td></tr><tr><th>3</th><td>1</td><td>c</td><td>0.493851</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & a1 & a2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & Float64? & Float64?\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & 0.736476 & 0.804705 \\\\\n",
       "\t2 & 1 & b & 0.501599 & 0.0438823 \\\\\n",
       "\t3 & 1 & c & 0.493851 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m a1       \u001b[0m\u001b[1m a2        \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Float64? \u001b[0m\u001b[90m Float64?  \u001b[0m\n",
       "─────┼──────────────────────────────────\n",
       "   1 │     1  a     0.736476  0.804705\n",
       "   2 │     1  b     0.501599  0.0438823\n",
       "   3 │     1  c     0.493851  0.248967"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# all columns other than named :variable and :value are treated as keys\n",
    "unstack(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>unstacked_a1</th><th>unstacked_a2</th></tr><tr><th></th><th>Int64</th><th>Char</th><th>Float64?</th><th>Float64?</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>a</td><td>0.736476</td><td>0.804705</td></tr><tr><th>2</th><td>1</td><td>b</td><td>0.501599</td><td>0.0438823</td></tr><tr><th>3</th><td>1</td><td>c</td><td>0.493851</td><td>0.248967</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& id & id2 & unstacked\\_a1 & unstacked\\_a2\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Char & Float64? & Float64?\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & a & 0.736476 & 0.804705 \\\\\n",
       "\t2 & 1 & b & 0.501599 & 0.0438823 \\\\\n",
       "\t3 & 1 & c & 0.493851 & 0.248967 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m3×4 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m id2  \u001b[0m\u001b[1m unstacked_a1 \u001b[0m\u001b[1m unstacked_a2 \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Float64?     \u001b[0m\u001b[90m Float64?     \u001b[0m\n",
       "─────┼─────────────────────────────────────────\n",
       "   1 │     1  a         0.736476     0.804705\n",
       "   2 │     1  b         0.501599     0.0438823\n",
       "   3 │     1  c         0.493851     0.248967"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# you can rename the unstacked columns\n",
    "unstack(y, renamecols=n->string(\"unstacked_\", n))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th></tr><tr><th></th><th>String</th><th>Float64</th></tr></thead><tbody><p>6 rows × 2 columns</p><tr><th>1</th><td>x1</td><td>0.358598</td></tr><tr><th>2</th><td>x1</td><td>0.524376</td></tr><tr><th>3</th><td>x1</td><td>0.700511</td></tr><tr><th>4</th><td>x2</td><td>0.148962</td></tr><tr><th>5</th><td>x2</td><td>0.833927</td></tr><tr><th>6</th><td>x2</td><td>0.468924</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cc}\n",
       "\t& variable & value\\\\\n",
       "\t\\hline\n",
       "\t& String & Float64\\\\\n",
       "\t\\hline\n",
       "\t1 & x1 & 0.358598 \\\\\n",
       "\t2 & x1 & 0.524376 \\\\\n",
       "\t3 & x1 & 0.700511 \\\\\n",
       "\t4 & x2 & 0.148962 \\\\\n",
       "\t5 & x2 & 0.833927 \\\\\n",
       "\t6 & x2 & 0.468924 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "\u001b[1m6×2 DataFrame\u001b[0m\n",
       "\u001b[1m Row \u001b[0m│\u001b[1m variable \u001b[0m\u001b[1m value    \u001b[0m\n",
       "\u001b[1m     \u001b[0m│\u001b[90m String   \u001b[0m\u001b[90m Float64  \u001b[0m\n",
       "─────┼────────────────────\n",
       "   1 │ x1        0.358598\n",
       "   2 │ x1        0.524376\n",
       "   3 │ x1        0.700511\n",
       "   4 │ x2        0.148962\n",
       "   5 │ x2        0.833927\n",
       "   6 │ x2        0.468924"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = stack(DataFrame(rand(3,2), :auto))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "ename": "LoadError",
     "evalue": "ArgumentError: No key column found",
     "output_type": "error",
     "traceback": [
      "ArgumentError: No key column found",
      "",
      "Stacktrace:",
      " [1] unstack(::DataFrame, ::InvertedIndex{InvertedIndices.TupleVector{Tuple{Int64,Int64}}}, ::Int64, ::Int64; renamecols::Function, allowmissing::Bool, allowduplicates::Bool) at D:\\AppData\\.julia\\packages\\DataFrames\\X0xNW\\src\\abstractdataframe\\reshape.jl:330",
      " [2] unstack(::DataFrame, ::Symbol, ::Symbol; renamecols::Function, allowmissing::Bool, allowduplicates::Bool) at D:\\AppData\\.julia\\packages\\DataFrames\\X0xNW\\src\\abstractdataframe\\reshape.jl:343",
      " [3] unstack(::DataFrame, ::Symbol, ::Symbol) at D:\\AppData\\.julia\\packages\\DataFrames\\X0xNW\\src\\abstractdataframe\\reshape.jl:341",
      " [4] top-level scope at In[18]:1",
      " [5] include_string(::Function, ::Module, ::String, ::String) at .\\loading.jl:1091"
     ]
    }
   ],
   "source": [
    "unstack(df, :variable, :value) # unable to unstack when no key column is present"
   ]
  }
 ],
 "metadata": {
  "@webio": {
   "lastCommId": null,
   "lastKernelId": null
  },
  "kernelspec": {
   "display_name": "Julia 1.5.3",
   "language": "julia",
   "name": "julia-1.5"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
